package com.saret.crawler.parser;

import com.saret.utils.UtfFileHandle;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.util.PDFTextStripper;

import java.io.File;
import java.io.FileInputStream;

/**
 * @author biniam.gebremichael
 * @since 12/28/11
 */
public class PdfTextParser {

    public static String pdf2Text(File file) throws Exception {
        PDFParser parser = new PDFParser(new FileInputStream(file));
        parser.parse();
        PDFTextStripper pdfStripper = new PDFTextStripper();
        return pdfStripper.getText(parser.getPDDocument());
    }

    public static void pdf2Text(File src, File des) {

        try {
            if(src.length()>1){
            UtfFileHandle.write(des, pdf2Text(src));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
